This project showcases a full bulk RNA-seq analysis pipeline as a portfolio piece for a CV. It includes QC, normalization, differential expression, interactive visualization, and interactive tables, demonstrating skills in R, visualization, and reproducible reporting.
For this showcase, we are using the Airway dataset from Bioconductor: human airway smooth muscle cells, treated with dexamethasone vs untreated.
data(airway)
airway$dex <- relevel(airway$dex, "untrt")
# Build DESeq2 design
dds <- DESeqDataSet(airway, ~ dex)
# QC and filter
keep <- rowSums(counts(dds) >= 10) >= 4
dds <- dds[keep,]
# Run DE
dds <- DESeq(dds)
res <- results(dds)
# Prepare data frame
res_df <- as.data.frame(res) %>%
dplyr::mutate(
gene = rownames(.),
sig = case_when(
padj < 0.05 & log2FoldChange > 1 ~ "Up",
padj < 0.05 & log2FoldChange < -1 ~ "Down",
TRUE ~ "NS"
),
padj = ifelse(is.na(padj), 1, padj)
)vsd <- vst(dds)
pca_data <- plotPCA(vsd, intgroup="dex", returnData=TRUE)
pca_plot <- ggplot(pca_data, aes(PC1, PC2, color=dex, label=name)) +
geom_point(size=5, alpha=0.6) +
geom_text(vjust=-1.5, size=3) +
scale_color_manual(values=c("#1f77b4", "#ff7f0e")) +
ggtitle("PCA of Bulk RNA-seq Samples") +
theme_minimal(base_size=14)
pca_plotvolc <- ggplot(res_df, aes(x = log2FoldChange, y = -log10(padj), color = sig, text = gene)) +
geom_point(alpha = 0.7, size = 3) +
scale_color_manual(values = c("Up" = "#e41a1c", "Down" = "#377eb8", "NS" = "#4daf4a")) +
geom_vline(xintercept = c(-1,1), linetype="dashed", color="black") +
geom_hline(yintercept = -log10(0.05), linetype="dashed", color="black") +
theme_minimal(base_size=14) +
ggtitle("Bulk RNA-seq Volcano Plot") +
xlab("Log2 Fold Change") +
ylab("-Log10 Adjusted P-value")
# Convert to interactive plotly
volc_plotly <- ggplotly(volc, tooltip = c("text","x","y"))
volc_plotly